PMID:27494614

install required packages

install.packages("geepack",repos="http://cran.us.r-project.org")
## Installing package into '/Users/MingT/Library/R/3.3/library'
## (as 'lib' is unspecified)
## 
## The downloaded binary packages are in
##  /var/folders/s8/0_mb4tr92mlfhspqc783k8z80000gn/T//RtmpY7DEpX/downloaded_packages
install.packages(c("lme4","mi","MASS"),repos="http://cran.us.r-project.org")
## Installing packages into '/Users/MingT/Library/R/3.3/library'
## (as 'lib' is unspecified)
## 
## The downloaded binary packages are in
##  /var/folders/s8/0_mb4tr92mlfhspqc783k8z80000gn/T//RtmpY7DEpX/downloaded_packages
install.packages("unbalanced",repos="http://cran.us.r-project.org") 
## Installing package into '/Users/MingT/Library/R/3.3/library'
## (as 'lib' is unspecified)
## 
## The downloaded binary packages are in
##  /var/folders/s8/0_mb4tr92mlfhspqc783k8z80000gn/T//RtmpY7DEpX/downloaded_packages
install.packages("crossval",repos="http://cran.us.r-project.org") 
## Installing package into '/Users/MingT/Library/R/3.3/library'
## (as 'lib' is unspecified)
## 
## The downloaded binary packages are in
##  /var/folders/s8/0_mb4tr92mlfhspqc783k8z80000gn/T//RtmpY7DEpX/downloaded_packages
install.packages("ada",repos="http://cran.us.r-project.org")
## Installing package into '/Users/MingT/Library/R/3.3/library'
## (as 'lib' is unspecified)
## 
## The downloaded binary packages are in
##  /var/folders/s8/0_mb4tr92mlfhspqc783k8z80000gn/T//RtmpY7DEpX/downloaded_packages
install.packages("e1071",repos="http://cran.us.r-project.org")
## Installing package into '/Users/MingT/Library/R/3.3/library'
## (as 'lib' is unspecified)
## 
## The downloaded binary packages are in
##  /var/folders/s8/0_mb4tr92mlfhspqc783k8z80000gn/T//RtmpY7DEpX/downloaded_packages
install.packages("rpart",repos="http://cran.us.r-project.org")
## Installing package into '/Users/MingT/Library/R/3.3/library'
## (as 'lib' is unspecified)
## 
## The downloaded binary packages are in
##  /var/folders/s8/0_mb4tr92mlfhspqc783k8z80000gn/T//RtmpY7DEpX/downloaded_packages
install.packages("class",repos="http://cran.us.r-project.org")
## Installing package into '/Users/MingT/Library/R/3.3/library'
## (as 'lib' is unspecified)
## 
## The downloaded binary packages are in
##  /var/folders/s8/0_mb4tr92mlfhspqc783k8z80000gn/T//RtmpY7DEpX/downloaded_packages
install.packages(c("RWeka","rJava"),repos="http://cran.us.r-project.org")
## Installing packages into '/Users/MingT/Library/R/3.3/library'
## (as 'lib' is unspecified)
## 
##   There is a binary version available but the source version is
##   later:
##       binary source needs_compilation
## RWeka 0.4-26 0.4-29             FALSE
## 
## 
## The downloaded binary packages are in
##  /var/folders/s8/0_mb4tr92mlfhspqc783k8z80000gn/T//RtmpY7DEpX/downloaded_packages
## installing the source package 'RWeka'
install.packages("lme4",repos="http://cran.us.r-project.org") 
## Installing package into '/Users/MingT/Library/R/3.3/library'
## (as 'lib' is unspecified)
## 
## The downloaded binary packages are in
##  /var/folders/s8/0_mb4tr92mlfhspqc783k8z80000gn/T//RtmpY7DEpX/downloaded_packages
install.packages("geepack",repos="http://cran.us.r-project.org") 
## Installing package into '/Users/MingT/Library/R/3.3/library'
## (as 'lib' is unspecified)
## 
## The downloaded binary packages are in
##  /var/folders/s8/0_mb4tr92mlfhspqc783k8z80000gn/T//RtmpY7DEpX/downloaded_packages
install.packages("mi",repos="http://cran.us.r-project.org") 
## Installing package into '/Users/MingT/Library/R/3.3/library'
## (as 'lib' is unspecified)
## 
## The downloaded binary packages are in
##  /var/folders/s8/0_mb4tr92mlfhspqc783k8z80000gn/T//RtmpY7DEpX/downloaded_packages
install.packages("unbalanced",repos="http://cran.us.r-project.org") 
## Installing package into '/Users/MingT/Library/R/3.3/library'
## (as 'lib' is unspecified)
## 
## The downloaded binary packages are in
##  /var/folders/s8/0_mb4tr92mlfhspqc783k8z80000gn/T//RtmpY7DEpX/downloaded_packages
options(warn=-1) 

data manipulation

setwd("~/documents/PPMI") #set working director
clinical<-read.csv("ppmi_clinical.csv",header=TRUE) #read raw data
colnames(clinical)[2]<-"FID_IID" 
ppmi_new<-read.csv("PPMI_GSA_clinical.csv",header=TRUE)
merged_dataset<-merge(ppmi_new,clinical,by="FID_IID",all.x=TRUE) #merge clinical data with brain image
write.csv(merged_dataset,"ppmi_clinical_complete.csv")
ppmi<-merged_dataset[,c(1:399,820:823,833,832,816)] #Select "tier 1" UPDRSs

longitudinal analysis

ppmi_longit<-read.csv("ppmi_top_wide_UPDRS.csv",header=TRUE)
clinical<-read.csv("ppmi_clinical.csv",header=TRUE)
merged_dataset<-merge(ppmi_longit,clinical,by="FID_IID",all.x=TRUE)
ppmi_long<-merged_dataset[,c(1:342,351:354,364,365,347)]

Longtitudinal analysis with different time points of UPDRS measurements

# use complete dataset with the result
DataMissOm<-na.omit(subset(ppmi_long,select=c(time_visit,PD,FID_IID,COGSTATE,COGDECLN,FNCDTCOG,COGDXCL,EDUCYRS,L_superior_parietal_gyrus_ComputeArea
                                              ,L_superior_parietal_gyrus_Volume,R_superior_parietal_gyrus_ComputeArea,R_superior_parietal_gyrus_Volume,L_putamen_ComputeArea,L_putamen_Volume,R_putamen_Volume 
                                              ,R_putamen_ShapeIndex,L_caudate_ComputeArea,L_caudate_Volume,R_caudate_ComputeArea,R_caudate_Volume,chr12_rs34637584_GT 
                                              ,chr17_rs11868035_GT,chr17_rs11012_GT,chr17_rs393152_GT,chr17_rs12185268_GT,chr17_rs199533_GT,Sex,Weight,Age,UPDRS_part_I,UPDRS_part_II,UPDRS_part_III)))
#normalize continuous variables
for(i in c(8:20,28:32)){
  DataMissOm[,i]<-scale(DataMissOm[,i])
}

GEE

library(geepack)
DataMissOm$COGSTATE<-as.numeric(DataMissOm$COGSTATE)
DataMissOm$COGDECLN<-as.numeric(DataMissOm$COGDECLN)
DataMissOm$FNCDTCOG<-as.numeric(DataMissOm$FNCDTCOG)
DataMissOm$COGDXCL<-as.numeric(DataMissOm$COGDXCL)
model_gee_UPDRS<-geeglm(PD~L_superior_parietal_gyrus_ComputeArea
                        +L_superior_parietal_gyrus_Volume+R_superior_parietal_gyrus_ComputeArea+R_superior_parietal_gyrus_Volume+L_putamen_ComputeArea+L_putamen_Volume+R_putamen_Volume 
                        +R_putamen_ShapeIndex+L_caudate_ComputeArea+L_caudate_Volume+R_caudate_ComputeArea+R_caudate_Volume+chr12_rs34637584_GT 
                        +chr17_rs11868035_GT+chr17_rs11012_GT+chr17_rs393152_GT+chr17_rs12185268_GT+chr17_rs199533_GT+Sex+Weight+Age
                        +UPDRS_part_I+UPDRS_part_II+UPDRS_part_III+FID_IID+COGSTATE+COGDECLN+FNCDTCOG+COGDXCL+EDUCYRS,
                        data=DataMissOm,waves=time_visit,family="binomial",id=FID_IID,corstr="exchangeable",scale.fix=TRUE)
summary(model_gee_UPDRS)
## 
## Call:
## geeglm(formula = PD ~ L_superior_parietal_gyrus_ComputeArea + 
##     L_superior_parietal_gyrus_Volume + R_superior_parietal_gyrus_ComputeArea + 
##     R_superior_parietal_gyrus_Volume + L_putamen_ComputeArea + 
##     L_putamen_Volume + R_putamen_Volume + R_putamen_ShapeIndex + 
##     L_caudate_ComputeArea + L_caudate_Volume + R_caudate_ComputeArea + 
##     R_caudate_Volume + chr12_rs34637584_GT + chr17_rs11868035_GT + 
##     chr17_rs11012_GT + chr17_rs393152_GT + chr17_rs12185268_GT + 
##     chr17_rs199533_GT + Sex + Weight + Age + UPDRS_part_I + UPDRS_part_II + 
##     UPDRS_part_III + FID_IID + COGSTATE + COGDECLN + FNCDTCOG + 
##     COGDXCL + EDUCYRS, family = "binomial", data = DataMissOm, 
##     id = FID_IID, waves = time_visit, corstr = "exchangeable", 
##     scale.fix = TRUE)
## 
##  Coefficients:
##                                         Estimate    Std.err     Wald
## (Intercept)                            1.787e+17  6.714e+14 70831.65
## L_superior_parietal_gyrus_ComputeArea -4.700e+17  2.633e+15 31867.29
## L_superior_parietal_gyrus_Volume       5.226e+17  2.836e+15 33961.30
## R_superior_parietal_gyrus_ComputeArea  3.618e+17  1.968e+15 33785.88
## R_superior_parietal_gyrus_Volume      -4.222e+17  2.330e+15 32820.75
## L_putamen_ComputeArea                 -1.330e+17  9.865e+14 18176.48
## L_putamen_Volume                       2.165e+17  1.166e+15 34476.22
## R_putamen_Volume                      -7.282e+16  4.836e+14 22677.60
## R_putamen_ShapeIndex                  -3.817e+15  1.923e+14   393.72
## L_caudate_ComputeArea                 -2.169e+17  1.567e+15 19144.96
## L_caudate_Volume                       1.112e+17  1.159e+15  9199.90
## R_caudate_ComputeArea                  9.570e+16  1.045e+15  8379.14
## R_caudate_Volume                       1.932e+16  7.656e+14   636.44
## chr12_rs34637584_GT                   -1.266e+17  1.230e+15 10595.16
## chr17_rs11868035_GT                    2.584e+16  3.206e+14  6497.19
## chr17_rs11012_GT                       2.435e+15  6.828e+14    12.71
## chr17_rs393152_GT                      1.212e+16  7.579e+14   255.77
## chr17_rs12185268_GT                   -1.241e+17  1.290e+15  9252.73
## chr17_rs199533_GT                      7.307e+16  8.266e+14  7814.16
## Sex                                    5.194e+16  6.038e+14  7398.75
## Weight                                -1.305e+16  2.352e+14  3075.64
## Age                                   -1.208e+16  2.022e+14  3568.28
## UPDRS_part_I                           1.019e+16  1.301e+14  6133.81
## UPDRS_part_II                          2.386e+15  2.648e+14    81.22
## UPDRS_part_III                         5.018e+16  3.686e+14 18537.46
## FID_IID                               -3.435e+13  3.649e+11  8860.45
## COGSTATE                               5.995e+15  2.795e+14   460.15
## COGDECLN                              -2.453e+16  3.557e+14  4753.03
## FNCDTCOG                              -1.571e+17  7.565e+14 43103.79
## COGDXCL                                1.578e+16  6.795e+14   539.41
## EDUCYRS                                3.086e+16  2.071e+14 22193.59
##                                       Pr(>|W|)    
## (Intercept)                            < 2e-16 ***
## L_superior_parietal_gyrus_ComputeArea  < 2e-16 ***
## L_superior_parietal_gyrus_Volume       < 2e-16 ***
## R_superior_parietal_gyrus_ComputeArea  < 2e-16 ***
## R_superior_parietal_gyrus_Volume       < 2e-16 ***
## L_putamen_ComputeArea                  < 2e-16 ***
## L_putamen_Volume                       < 2e-16 ***
## R_putamen_Volume                       < 2e-16 ***
## R_putamen_ShapeIndex                   < 2e-16 ***
## L_caudate_ComputeArea                  < 2e-16 ***
## L_caudate_Volume                       < 2e-16 ***
## R_caudate_ComputeArea                  < 2e-16 ***
## R_caudate_Volume                       < 2e-16 ***
## chr12_rs34637584_GT                    < 2e-16 ***
## chr17_rs11868035_GT                    < 2e-16 ***
## chr17_rs11012_GT                      0.000363 ***
## chr17_rs393152_GT                      < 2e-16 ***
## chr17_rs12185268_GT                    < 2e-16 ***
## chr17_rs199533_GT                      < 2e-16 ***
## Sex                                    < 2e-16 ***
## Weight                                 < 2e-16 ***
## Age                                    < 2e-16 ***
## UPDRS_part_I                           < 2e-16 ***
## UPDRS_part_II                          < 2e-16 ***
## UPDRS_part_III                         < 2e-16 ***
## FID_IID                                < 2e-16 ***
## COGSTATE                               < 2e-16 ***
## COGDECLN                               < 2e-16 ***
## FNCDTCOG                               < 2e-16 ***
## COGDXCL                                < 2e-16 ***
## EDUCYRS                                < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Scale is fixed.
## 
## Correlation: Structure = exchangeable  Link = identity 
## 
## Estimated Correlation Parameters:
##        Estimate   Std.err
## alpha 2.217e+15 1.086e+30
## Number of clusters:   406   Maximum cluster size: 12

GLMM

library(lme4)
## Loading required package: Matrix
model_lmm_UPDRS<-glmer(PD~L_superior_parietal_gyrus_ComputeArea
                       +L_superior_parietal_gyrus_Volume+R_superior_parietal_gyrus_ComputeArea+R_superior_parietal_gyrus_Volume+L_putamen_ComputeArea+L_putamen_Volume+R_putamen_Volume 
                       +R_putamen_ShapeIndex+L_caudate_ComputeArea+L_caudate_Volume+R_caudate_ComputeArea+R_caudate_Volume+chr12_rs34637584_GT 
                       +chr17_rs11868035_GT+chr17_rs11012_GT+chr17_rs393152_GT+chr17_rs12185268_GT+chr17_rs199533_GT+Sex+Weight+Age+UPDRS_part_I+UPDRS_part_II+UPDRS_part_III+FID_IID+COGSTATE+COGDECLN+FNCDTCOG+COGDXCL+EDUCYRS
                       +(1|FID_IID),control = glmerControl(optCtrl=list(maxfun=20000)),
                       data=DataMissOm,family="binomial")
summary(model_lmm_UPDRS)
## Generalized linear mixed model fit by maximum likelihood (Laplace
##   Approximation) [glmerMod]
##  Family: binomial  ( logit )
## Formula: 
## PD ~ L_superior_parietal_gyrus_ComputeArea + L_superior_parietal_gyrus_Volume +  
##     R_superior_parietal_gyrus_ComputeArea + R_superior_parietal_gyrus_Volume +  
##     L_putamen_ComputeArea + L_putamen_Volume + R_putamen_Volume +  
##     R_putamen_ShapeIndex + L_caudate_ComputeArea + L_caudate_Volume +  
##     R_caudate_ComputeArea + R_caudate_Volume + chr12_rs34637584_GT +  
##     chr17_rs11868035_GT + chr17_rs11012_GT + chr17_rs393152_GT +  
##     chr17_rs12185268_GT + chr17_rs199533_GT + Sex + Weight +  
##     Age + UPDRS_part_I + UPDRS_part_II + UPDRS_part_III + FID_IID +  
##     COGSTATE + COGDECLN + FNCDTCOG + COGDXCL + EDUCYRS + (1 |  
##     FID_IID)
##    Data: DataMissOm
## Control: glmerControl(optCtrl = list(maxfun = 20000))
## 
##      AIC      BIC   logLik deviance df.resid 
##    193.8    380.9    -64.9    129.8     2530 
## 
## Scaled residuals: 
##    Min     1Q Median     3Q    Max 
## -0.682  0.000  0.000  0.000  0.706 
## 
## Random effects:
##  Groups  Name        Variance Std.Dev.
##  FID_IID (Intercept) 36.3     6.03    
## Number of obs: 2562, groups:  FID_IID, 406
## 
## Fixed effects:
##                                        Estimate Std. Error z value
## (Intercept)                           -7.82e+01   6.52e+04    0.00
## L_superior_parietal_gyrus_ComputeArea -1.94e+00   6.84e+00   -0.28
## L_superior_parietal_gyrus_Volume       2.34e+00   7.22e+00    0.32
## R_superior_parietal_gyrus_ComputeArea -1.36e-01   6.28e+00   -0.02
## R_superior_parietal_gyrus_Volume       1.31e+00   6.66e+00    0.20
## L_putamen_ComputeArea                 -3.63e+00   5.75e+00   -0.63
## L_putamen_Volume                       2.84e+00   5.15e+00    0.55
## R_putamen_Volume                      -2.95e-01   3.41e+00   -0.09
## R_putamen_ShapeIndex                  -2.06e-01   1.35e+00   -0.15
## L_caudate_ComputeArea                 -6.77e+00   7.93e+00   -0.85
## L_caudate_Volume                       3.38e+00   7.58e+00    0.45
## R_caudate_ComputeArea                  5.30e+00   8.53e+00    0.62
## R_caudate_Volume                      -2.08e+00   7.78e+00   -0.27
## chr12_rs34637584_GT                   -4.51e+00   4.94e+02   -0.01
## chr17_rs11868035_GT                   -4.92e-01   1.65e+00   -0.30
## chr17_rs11012_GT                       7.34e-01   3.40e+00    0.22
## chr17_rs393152_GT                      2.92e+00   3.43e+00    0.85
## chr17_rs12185268_GT                    3.02e+00   5.76e+00    0.52
## chr17_rs199533_GT                     -2.90e+00   4.92e+00   -0.59
## Sex                                    1.09e+00   2.73e+00    0.40
## Weight                                 1.88e+00   1.35e+00    1.40
## Age                                    1.83e+00   1.33e+00    1.38
## UPDRS_part_I                           3.29e-01   1.70e+00    0.19
## UPDRS_part_II                         -1.28e+01   4.02e+00   -3.19
## UPDRS_part_III                        -2.09e+01   5.74e+00   -3.65
## FID_IID                               -1.60e-03   3.31e-03   -0.48
## COGSTATE                               9.42e+00   8.01e+00    1.18
## COGDECLN                              -4.48e+00   4.12e+00   -1.09
## FNCDTCOG                               8.71e+00   6.52e+04    0.00
## COGDXCL                               -4.27e+00   5.06e+00   -0.84
## EDUCYRS                                3.68e-02   1.29e+00    0.03
##                                       Pr(>|z|)    
## (Intercept)                            0.99904    
## L_superior_parietal_gyrus_ComputeArea  0.77698    
## L_superior_parietal_gyrus_Volume       0.74594    
## R_superior_parietal_gyrus_ComputeArea  0.98267    
## R_superior_parietal_gyrus_Volume       0.84431    
## L_putamen_ComputeArea                  0.52797    
## L_putamen_Volume                       0.58167    
## R_putamen_Volume                       0.93113    
## R_putamen_ShapeIndex                   0.87846    
## L_caudate_ComputeArea                  0.39352    
## L_caudate_Volume                       0.65557    
## R_caudate_ComputeArea                  0.53451    
## R_caudate_Volume                       0.78918    
## chr12_rs34637584_GT                    0.99272    
## chr17_rs11868035_GT                    0.76636    
## chr17_rs11012_GT                       0.82913    
## chr17_rs393152_GT                      0.39461    
## chr17_rs12185268_GT                    0.60026    
## chr17_rs199533_GT                      0.55589    
## Sex                                    0.68982    
## Weight                                 0.16203    
## Age                                    0.16687    
## UPDRS_part_I                           0.84611    
## UPDRS_part_II                          0.00140 ** 
## UPDRS_part_III                         0.00026 ***
## FID_IID                                0.62892    
## COGSTATE                               0.23984    
## COGDECLN                               0.27759    
## FNCDTCOG                               0.99989    
## COGDXCL                                0.39832    
## EDUCYRS                                0.97715    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Correlation matrix not shown by default, as p = 31 > 12.
## Use print(x, correlation=TRUE)  or
##   vcov(x)     if you need it
## fit warnings:
## Some predictor variables are on very different scales: consider rescaling
## convergence code: 0
## unable to evaluate scaled gradient
## Model failed to converge: degenerate  Hessian with 1 negative eigenvalues
## failure to converge in 20000 evaluations
length(unique(DataMissOm$FID_IID)) #406 patients
## [1] 406
nrow(DataMissOm) #2562 observations
## [1] 2562

Longtitudinal analysis with different time point of brain imaging measures

ppmi_base<-read.csv("ppmi_ROI_baseline.csv",header=TRUE)
ppmi_base<-ppmi_base[,c(2,24:26,29:33)] #may add 27 28
ppmi_new<-read.csv("PPMI_GSA_clinical.csv",header=TRUE)
ppmi_imaging<-ppmi_new[,1:335]

merged_dataset<-merge(ppmi_imaging,ppmi_base,by="FID_IID",all.x=TRUE)
DataMissOm<-na.omit(subset(merged_dataset,select=c(VisitID,ResearchGroup,FID_IID,COGSTATE,COGDECLN,FNCDTCOG,COGDXCL,EDUCYRS,L_superior_parietal_gyrus_ComputeArea
                                                   ,L_superior_parietal_gyrus_Volume,R_superior_parietal_gyrus_ComputeArea,R_superior_parietal_gyrus_Volume,L_putamen_ComputeArea,L_putamen_Volume,R_putamen_Volume 
                                                   ,R_putamen_ShapeIndex,L_caudate_ComputeArea,L_caudate_Volume,R_caudate_ComputeArea,R_caudate_Volume,chr12_rs34637584_GT 
                                                   ,chr17_rs11868035_GT,chr17_rs11012_GT,chr17_rs393152_GT,chr17_rs12185268_GT,chr17_rs199533_GT,Sex,Weight,Age,UPDRS_Part_I_Summary_Score_Baseline,UPDRS_Part_II_Patient_Questionnaire_Summary_Score_Baseline,UPDRS_Part_III_Summary_Score_Baseline)))
#normalize continuous variables
for(i in c(9:20,28:32)){
  DataMissOm[,i]<-scale(DataMissOm[,i])
}
library(geepack)
DataMissOm$COGSTATE<-as.numeric(DataMissOm$COGSTATE)
DataMissOm$COGDECLN<-as.numeric(DataMissOm$COGDECLN)
DataMissOm$FNCDTCOG<-as.numeric(DataMissOm$FNCDTCOG)
DataMissOm$COGDXCL<-as.numeric(DataMissOm$COGDXCL)
DataMissOm$PD<-ifelse(DataMissOm$ResearchGroup=="Control",0,1)
model_gee_imaging<-geeglm(PD~L_superior_parietal_gyrus_ComputeArea
                          +L_superior_parietal_gyrus_Volume+R_superior_parietal_gyrus_ComputeArea+R_superior_parietal_gyrus_Volume+L_putamen_ComputeArea+L_putamen_Volume+R_putamen_Volume 
                          +R_putamen_ShapeIndex+L_caudate_ComputeArea+L_caudate_Volume+R_caudate_ComputeArea+R_caudate_Volume+chr12_rs34637584_GT 
                          +chr17_rs11868035_GT+chr17_rs11012_GT+chr17_rs393152_GT+chr17_rs12185268_GT+chr17_rs199533_GT+Sex+Weight+Age
                          +UPDRS_Part_I_Summary_Score_Baseline+UPDRS_Part_II_Patient_Questionnaire_Summary_Score_Baseline+UPDRS_Part_III_Summary_Score_Baseline+FID_IID+COGSTATE+COGDECLN+FNCDTCOG+COGDXCL+EDUCYRS,
                          data=DataMissOm,waves=VisitID,family="binomial",id=FID_IID,corstr="exchangeable",scale.fix=TRUE)
summary(model_gee_imaging)
## 
## Call:
## geeglm(formula = PD ~ L_superior_parietal_gyrus_ComputeArea + 
##     L_superior_parietal_gyrus_Volume + R_superior_parietal_gyrus_ComputeArea + 
##     R_superior_parietal_gyrus_Volume + L_putamen_ComputeArea + 
##     L_putamen_Volume + R_putamen_Volume + R_putamen_ShapeIndex + 
##     L_caudate_ComputeArea + L_caudate_Volume + R_caudate_ComputeArea + 
##     R_caudate_Volume + chr12_rs34637584_GT + chr17_rs11868035_GT + 
##     chr17_rs11012_GT + chr17_rs393152_GT + chr17_rs12185268_GT + 
##     chr17_rs199533_GT + Sex + Weight + Age + UPDRS_Part_I_Summary_Score_Baseline + 
##     UPDRS_Part_II_Patient_Questionnaire_Summary_Score_Baseline + 
##     UPDRS_Part_III_Summary_Score_Baseline + FID_IID + COGSTATE + 
##     COGDECLN + FNCDTCOG + COGDXCL + EDUCYRS, family = "binomial", 
##     data = DataMissOm, id = FID_IID, waves = VisitID, corstr = "exchangeable", 
##     scale.fix = TRUE)
## 
##  Coefficients:
##                                                             Estimate
## (Intercept)                                                 3.95e+02
## L_superior_parietal_gyrus_ComputeArea                       2.21e+02
## L_superior_parietal_gyrus_Volume                           -2.66e+02
## R_superior_parietal_gyrus_ComputeArea                       1.19e+01
## R_superior_parietal_gyrus_Volume                            4.60e+01
## L_putamen_ComputeArea                                       1.77e+02
## L_putamen_Volume                                           -4.79e+01
## R_putamen_Volume                                           -1.25e+02
## R_putamen_ShapeIndex                                        1.95e+01
## L_caudate_ComputeArea                                      -3.39e+01
## L_caudate_Volume                                            1.78e+02
## R_caudate_ComputeArea                                      -3.14e+01
## R_caudate_Volume                                           -9.44e+01
## chr12_rs34637584_GT                                        -5.51e+02
## chr17_rs11868035_GT                                        -7.33e+01
## chr17_rs11012_GT                                            1.02e+02
## chr17_rs393152_GT                                          -2.71e+02
## chr17_rs12185268_GT                                         1.61e+02
## chr17_rs199533_GT                                          -2.52e+01
## Sex                                                         1.35e+02
## Weight                                                      4.50e+01
## Age                                                        -5.73e+01
## UPDRS_Part_I_Summary_Score_Baseline                        -1.20e+02
## UPDRS_Part_II_Patient_Questionnaire_Summary_Score_Baseline  6.32e+02
## UPDRS_Part_III_Summary_Score_Baseline                       5.96e+02
## FID_IID                                                     1.56e-01
## COGSTATE                                                   -6.57e+01
## COGDECLN                                                    4.47e+02
## FNCDTCOG                                                   -8.99e+02
## COGDXCL                                                     9.39e+01
## EDUCYRS                                                     9.21e+00
##                                                              Std.err
## (Intercept)                                                 4.34e+00
## L_superior_parietal_gyrus_ComputeArea                       3.06e+00
## L_superior_parietal_gyrus_Volume                            3.17e+00
## R_superior_parietal_gyrus_ComputeArea                       1.48e+00
## R_superior_parietal_gyrus_Volume                            1.38e+00
## L_putamen_ComputeArea                                       1.22e+00
## L_putamen_Volume                                            9.56e-01
## R_putamen_Volume                                            1.15e+00
## R_putamen_ShapeIndex                                        5.92e-01
## L_caudate_ComputeArea                                       2.33e+00
## L_caudate_Volume                                            2.10e+00
## R_caudate_ComputeArea                                       1.74e+00
## R_caudate_Volume                                            1.90e+00
## chr12_rs34637584_GT                                         9.80e-01
## chr17_rs11868035_GT                                         4.87e-01
## chr17_rs11012_GT                                            1.14e+00
## chr17_rs393152_GT                                           9.68e-01
## chr17_rs12185268_GT                                         1.51e+00
## chr17_rs199533_GT                                           8.56e-01
## Sex                                                         9.16e-01
## Weight                                                      3.21e-01
## Age                                                         3.02e-01
## UPDRS_Part_I_Summary_Score_Baseline                         3.93e-01
## UPDRS_Part_II_Patient_Questionnaire_Summary_Score_Baseline  3.20e-01
## UPDRS_Part_III_Summary_Score_Baseline                       3.99e-01
## FID_IID                                                     9.25e-04
## COGSTATE                                                    1.15e+00
## COGDECLN                                                    1.58e+00
## FNCDTCOG                                                    2.18e+00
## COGDXCL                                                     1.48e+00
## EDUCYRS                                                     1.02e-01
##                                                                Wald
## (Intercept)                                                8.27e+03
## L_superior_parietal_gyrus_ComputeArea                      5.24e+03
## L_superior_parietal_gyrus_Volume                           7.06e+03
## R_superior_parietal_gyrus_ComputeArea                      6.43e+01
## R_superior_parietal_gyrus_Volume                           1.11e+03
## L_putamen_ComputeArea                                      2.12e+04
## L_putamen_Volume                                           2.51e+03
## R_putamen_Volume                                           1.18e+04
## R_putamen_ShapeIndex                                       1.09e+03
## L_caudate_ComputeArea                                      2.11e+02
## L_caudate_Volume                                           7.15e+03
## R_caudate_ComputeArea                                      3.26e+02
## R_caudate_Volume                                           2.46e+03
## chr12_rs34637584_GT                                        3.17e+05
## chr17_rs11868035_GT                                        2.27e+04
## chr17_rs11012_GT                                           8.00e+03
## chr17_rs393152_GT                                          7.83e+04
## chr17_rs12185268_GT                                        1.15e+04
## chr17_rs199533_GT                                          8.69e+02
## Sex                                                        2.17e+04
## Weight                                                     1.97e+04
## Age                                                        3.59e+04
## UPDRS_Part_I_Summary_Score_Baseline                        9.24e+04
## UPDRS_Part_II_Patient_Questionnaire_Summary_Score_Baseline 3.91e+06
## UPDRS_Part_III_Summary_Score_Baseline                      2.24e+06
## FID_IID                                                    2.83e+04
## COGSTATE                                                   3.26e+03
## COGDECLN                                                   8.00e+04
## FNCDTCOG                                                   1.70e+05
## COGDXCL                                                    4.03e+03
## EDUCYRS                                                    8.17e+03
##                                                            Pr(>|W|)    
## (Intercept)                                                 < 2e-16 ***
## L_superior_parietal_gyrus_ComputeArea                       < 2e-16 ***
## L_superior_parietal_gyrus_Volume                            < 2e-16 ***
## R_superior_parietal_gyrus_ComputeArea                       1.1e-15 ***
## R_superior_parietal_gyrus_Volume                            < 2e-16 ***
## L_putamen_ComputeArea                                       < 2e-16 ***
## L_putamen_Volume                                            < 2e-16 ***
## R_putamen_Volume                                            < 2e-16 ***
## R_putamen_ShapeIndex                                        < 2e-16 ***
## L_caudate_ComputeArea                                       < 2e-16 ***
## L_caudate_Volume                                            < 2e-16 ***
## R_caudate_ComputeArea                                       < 2e-16 ***
## R_caudate_Volume                                            < 2e-16 ***
## chr12_rs34637584_GT                                         < 2e-16 ***
## chr17_rs11868035_GT                                         < 2e-16 ***
## chr17_rs11012_GT                                            < 2e-16 ***
## chr17_rs393152_GT                                           < 2e-16 ***
## chr17_rs12185268_GT                                         < 2e-16 ***
## chr17_rs199533_GT                                           < 2e-16 ***
## Sex                                                         < 2e-16 ***
## Weight                                                      < 2e-16 ***
## Age                                                         < 2e-16 ***
## UPDRS_Part_I_Summary_Score_Baseline                         < 2e-16 ***
## UPDRS_Part_II_Patient_Questionnaire_Summary_Score_Baseline  < 2e-16 ***
## UPDRS_Part_III_Summary_Score_Baseline                       < 2e-16 ***
## FID_IID                                                     < 2e-16 ***
## COGSTATE                                                    < 2e-16 ***
## COGDECLN                                                    < 2e-16 ***
## FNCDTCOG                                                    < 2e-16 ***
## COGDXCL                                                     < 2e-16 ***
## EDUCYRS                                                     < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Scale is fixed.
## 
## Correlation: Structure = exchangeable  Link = identity 
## 
## Estimated Correlation Parameters:
##       Estimate  Std.err
## alpha 3.68e-07 2.62e-06
## Number of clusters:   423   Maximum cluster size: 3

baseline analysis

ppmi_1<-ppmi[ppmi$VisitID==1,]
ppmi_1_complete<-ppmi_1[,c(1,
                           3:282, # imaging index
                           283:284,287, #demographic index
                           288,291,294,297,300,303, #genotype
                           336,348,360,384,392, #UPDRS+asessment non motor may delete 372
                           400:404, #clinical index
                           405:406)]
ppmi_1_complete$ind<-is.na(ppmi_1_complete$chr17_rs199533_GT)
ppmi_1_complete<-ppmi_1_complete[ppmi_1_complete$ind==FALSE,-303]
ppmi_MI<-ppmi_1_complete[,c(291:300)]

Multiple Imputation

library(mi)
## Loading required package: stats4
## mi (Version 1.0, packaged: 2015-04-16 14:03:10 UTC; goodrich)
## mi  Copyright (C) 2008, 2009, 2010, 2011, 2012, 2013, 2014, 2015 Trustees of Columbia University
## This program comes with ABSOLUTELY NO WARRANTY.
## This is free software, and you are welcome to redistribute it
## under the General Public License version 2 or later.
## Execute RShowDoc('COPYING') for details.
set.seed(1414)
mdf<-missing_data.frame(ppmi_MI)
## NOTE: The following pairs of variables appear to have the same missingness pattern.
##  Please verify whether they are in fact logically distinct variables.
##      [,1]       [,2]      
## [1,] "COGDECLN" "FNCDTCOG"
## [2,] "COGDECLN" "COGDXCL" 
## [3,] "FNCDTCOG" "COGDXCL"
summary(mdf)
##  UPDRS_Part_I_Summary_Score_Baseline
##  Min.   : 0.00                      
##  1st Qu.: 0.00                      
##  Median : 1.00                      
##  Mean   : 1.06                      
##  3rd Qu.: 2.00                      
##  Max.   :10.00                      
##  NA's   :2                          
##  UPDRS_Part_II_Patient_Questionnaire_Summary_Score_Baseline
##  Min.   : 0.00                                             
##  1st Qu.: 1.00                                             
##  Median : 3.00                                             
##  Mean   : 4.41                                             
##  3rd Qu.: 7.00                                             
##  Max.   :25.00                                             
##  NA's   :3                                                 
##  UPDRS_Part_III_Summary_Score_Baseline
##  Min.   : 0                           
##  1st Qu.: 3                           
##  Median :16                           
##  Mean   :15                           
##  3rd Qu.:23                           
##  Max.   :47                           
##  NA's   :2                            
##  X_Assessment_Non.Motor_Epworth_Sleepiness_Scale_Summary_Score_Baseline
##  Min.   : 0.00                                                         
##  1st Qu.: 3.00                                                         
##  Median : 6.00                                                         
##  Mean   : 6.02                                                         
##  3rd Qu.: 8.00                                                         
##  Max.   :19.00                                                         
##  NA's   :4                                                             
##  X_Assessment_Non.Motor_Geriatric_Depression_Scale_GDS_Short_Summary_Score_Baseline
##  Min.   : 1.00                                                                     
##  1st Qu.: 5.00                                                                     
##  Median : 5.00                                                                     
##  Mean   : 5.23                                                                     
##  3rd Qu.: 6.00                                                                     
##  Max.   :15.00                                                                     
##  NA's   :1                                                                         
##                                COGSTATE    COGDECLN   FNCDTCOG  
##  1930                              :  0   No   :374   No  :399  
##  1941                              :  0   Right:  0   Yes :  9  
##  1955                              :  0   Yes  : 34   NA's: 15  
##  Dementia (PDD)                    :  5   NA's : 15             
##  Mild Cognitive Impairment (PD-MCI): 52                         
##  Normal Cognition (PD-NC)          :350                         
##  NA's                              : 16                         
##        COGDXCL       EDUCYRS    
##  10% - 49% :  5   Min.   : 5.0  
##  50% - 89% : 57   1st Qu.:14.0  
##  90% - 100%:346   Median :16.0  
##  NA's      : 15   Mean   :16.1  
##                   3rd Qu.:18.0  
##                   Max.   :26.0  
## 
imputation<-mi(mdf)
complete<-complete(imputation) 
c4<-complete[[4]] #extract complete dataset from it from imputed datasets. 
# in this case, we randomly chose 4th. 
c4<-c4[,1:10]
ppmi_1<-cbind(ppmi_1_complete[,1:290],c4,ppmi_1_complete[,301:302]) #complete dataset 

ppmi_1_ROI<-ppmi_1[,c(1,23,24 ,28,29,33,34,38,39,143,144,148,149,282:302)] #complete dataset of regions of interest

write.csv(ppmi_1,"ppmi_complete_baseline.csv")
write.csv(ppmi_1_ROI,"ppmi_ROI_baseline.csv")

explore the distribution of baseline data

#categorical
col_n<-c(14,17:22,28:31)
#ppmi_1_ROI$APPRDX<-as.numeric(ppmi_1_ROI$APPRDX)

# frequency table
for (i in col_n){
  print(colnames(ppmi_1_ROI[i]))
  m<-table(ppmi_1_ROI[,i],ppmi_1_ROI[,33])
  print(m)
  print(fisher.test(m)$p.value)
}
## [1] "Sex"
##    
##      HC  PD
##   1  84 193
##   2  39 107
## [1] 0.499
## [1] "chr12_rs34637584_GT"
##    
##      HC  PD
##   0 123 296
##   1   0   4
## [1] 0.327
## [1] "chr17_rs11868035_GT"
##    
##      HC  PD
##   0  67 142
##   1  40 127
##   2  16  31
## [1] 0.16
## [1] "chr17_rs11012_GT"
##    
##      HC  PD
##   0  84 204
##   1  37  86
##   2   2  10
## [1] 0.716
## [1] "chr17_rs393152_GT"
##    
##      HC  PD
##   0  77 186
##   1  40  99
##   2   6  15
## [1] 1
## [1] "chr17_rs12185268_GT"
##    
##      HC  PD
##   0  80 189
##   1  39  98
##   2   4  13
## [1] 0.888
## [1] "chr17_rs199533_GT"
##    
##      HC  PD
##   0  82 193
##   1  39  96
##   2   2  11
## [1] 0.598
## [1] "COGSTATE"
##                                     
##                                       HC  PD
##   Dementia (PDD)                       0   5
##   Mild Cognitive Impairment (PD-MCI)   1  51
##   Normal Cognition (PD-NC)           122 244
## [1] 1e-07
## [1] "COGDECLN"
##      
##        HC  PD
##   No  122 267
##   Yes   1  33
## [1] 0.000114
## [1] "FNCDTCOG"
##      
##        HC  PD
##   No  123 288
##   Yes   0  12
## [1] 0.0221
## [1] "COGDXCL"
##             
##               HC  PD
##   10% - 49%    0   5
##   50% - 89%    5  52
##   90% - 100% 118 243
## [1] 8.04e-05
for (i in col_n){
  print(colnames(ppmi_1_ROI[i]))
  m<-table(ppmi_1_ROI[,i],ppmi_1_ROI[,34])
  print(m)
  print(fisher.test(m)$p.value)
}
## [1] "Sex"
##    
##      psychiatric Healthy Control Parkinson's disease SWEDD
##   1            0              84                 170    23
##   2            0              39                  93    14
## [1] 0.711
## [1] "chr12_rs34637584_GT"
##    
##      psychiatric Healthy Control Parkinson's disease SWEDD
##   0            0             123                 259    37
##   1            0               0                   4     0
## [1] 0.523
## [1] "chr17_rs11868035_GT"
##    
##      psychiatric Healthy Control Parkinson's disease SWEDD
##   0            0              67                 127    15
##   1            0              40                 107    20
##   2            0              16                  29     2
## [1] 0.189
## [1] "chr17_rs11012_GT"
##    
##      psychiatric Healthy Control Parkinson's disease SWEDD
##   0            0              84                 176    28
##   1            0              37                  78     8
##   2            0               2                   9     1
## [1] 0.733
## [1] "chr17_rs393152_GT"
##    
##      psychiatric Healthy Control Parkinson's disease SWEDD
##   0            0              77                 160    26
##   1            0              40                  89    10
##   2            0               6                  14     1
## [1] 0.905
## [1] "chr17_rs12185268_GT"
##    
##      psychiatric Healthy Control Parkinson's disease SWEDD
##   0            0              80                 163    26
##   1            0              39                  88    10
##   2            0               4                  12     1
## [1] 0.906
## [1] "chr17_rs199533_GT"
##    
##      psychiatric Healthy Control Parkinson's disease SWEDD
##   0            0              82                 167    26
##   1            0              39                  86    10
##   2            0               2                  10     1
## [1] 0.779
## [1] "COGSTATE"
##                                     
##                                       psychiatric Healthy Control
##   Dementia (PDD)                                0               0
##   Mild Cognitive Impairment (PD-MCI)            0               1
##   Normal Cognition (PD-NC)                      0             122
##                                     
##                                      Parkinson's disease SWEDD
##   Dementia (PDD)                                       5     0
##   Mild Cognitive Impairment (PD-MCI)                  40    11
##   Normal Cognition (PD-NC)                           218    26
## [1] 7.6e-08
## [1] "COGDECLN"
##      
##        psychiatric Healthy Control Parkinson's disease SWEDD
##   No             0             122                 235    32
##   Yes            0               1                  28     5
## [1] 0.000223
## [1] "FNCDTCOG"
##      
##        psychiatric Healthy Control Parkinson's disease SWEDD
##   No             0             123                 254    34
##   Yes            0               0                   9     3
## [1] 0.00897
## [1] "COGDXCL"
##             
##               psychiatric Healthy Control Parkinson's disease SWEDD
##   10% - 49%             0               0                   5     0
##   50% - 89%             0               5                  45     7
##   90% - 100%            0             118                 213    30
## [1] 0.000547
#continuous
#normality test
col_n<-c(2:13,15,16,23:27,32)
aa<-matrix(NA,20,1)
k<-1
for(i in col_n){
  aa[k,1]<-shapiro.test(ppmi_1_ROI[,i])$p.value
  k<-k+1
}
k<-1
#for(i in col_n){
# x<-ppmi_1_ROI[ppmi_1_ROI$RECRUITMENT_CAT=="HC",i] 
#  y<-ppmi_1_ROI[ppmi_1_ROI$RECRUITMENT_CAT=="PD",i] 
#  aa[k,2]<-t.test(x,y)$p.value
#  k<-k+1
#}

aa<-as.data.frame(aa)
rownames(aa)<-colnames(ppmi_1_ROI)[col_n]
colnames(aa)<-c("normality-test (>0.1 normal)")
print(aa)
##                                                                                    normality-test (>0.1 normal)
## L_caudate_ComputeArea                                                                                  3.65e-10
## L_caudate_Volume                                                                                       9.34e-12
## R_caudate_ComputeArea                                                                                  5.56e-18
## R_caudate_Volume                                                                                       8.53e-15
## L_putamen_ComputeArea                                                                                  3.16e-08
## L_putamen_Volume                                                                                       1.64e-07
## R_putamen_ComputeArea                                                                                  5.34e-18
## R_putamen_Volume                                                                                       9.21e-14
## L_superior_parietal_gyrus_ComputeArea                                                                  5.55e-15
## L_superior_parietal_gyrus_Volume                                                                       3.26e-12
## R_superior_parietal_gyrus_ComputeArea                                                                  9.98e-16
## R_superior_parietal_gyrus_Volume                                                                       5.92e-12
## Weight                                                                                                 7.37e-04
## Age                                                                                                    3.22e-03
## UPDRS_Part_I_Summary_Score_Baseline                                                                    4.81e-25
## UPDRS_Part_II_Patient_Questionnaire_Summary_Score_Baseline                                             2.25e-18
## UPDRS_Part_III_Summary_Score_Baseline                                                                  2.95e-24
## X_Assessment_Non.Motor_Epworth_Sleepiness_Scale_Summary_Score_Baseline                                 3.10e-10
## X_Assessment_Non.Motor_Geriatric_Depression_Scale_GDS_Short_Summary_Score_Baseline                     1.12e-19
## EDUCYRS                                                                                                3.97e-08
write.csv(aa,"continuous_descriptive.csv")

model-based analysis

Logistic Regression

#logistic regression
for(i in c(2:13,15,16,23:27,32)){
  ppmi_1_ROI[,i]<-scale(ppmi_1_ROI[,i])
}
ppmi_1_ROI<-as.data.frame(ppmi_1_ROI)
levels(ppmi_1_ROI$RECRUITMENT_CAT)<-c(0,1)
ppmi_1_ROI$RECRUITMENT_CAT<-as.numeric(ppmi_1_ROI$RECRUITMENT_CAT)
ppmi_1_ROI$RECRUITMENT_CAT<-ppmi_1_ROI$RECRUITMENT_CAT-1
logistic_ROI<-glm(RECRUITMENT_CAT~.,data=ppmi_1_ROI[,-c(1,34)],maxit=50,family="binomial")
library(MASS)
stepp<-stepAIC(logistic_ROI,trace=FALSE) #stepwise logistic regression
summary(logistic_ROI)
## 
## Call:
## glm(formula = RECRUITMENT_CAT ~ ., family = "binomial", data = ppmi_1_ROI[, 
##     -c(1, 34)], maxit = 50)
## 
## Deviance Residuals: 
##    Min      1Q  Median      3Q     Max  
## -5.669  -0.147   0.001   0.075   1.910  
## 
## Coefficients:
##                                                                                     Estimate
## (Intercept)                                                                         1.71e+01
## L_caudate_ComputeArea                                                               1.28e+00
## L_caudate_Volume                                                                   -3.71e-01
## R_caudate_ComputeArea                                                              -1.99e+00
## R_caudate_Volume                                                                    6.75e-01
## L_putamen_ComputeArea                                                              -4.27e-01
## L_putamen_Volume                                                                    9.21e-01
## R_putamen_ComputeArea                                                               4.31e-01
## R_putamen_Volume                                                                   -5.46e-01
## L_superior_parietal_gyrus_ComputeArea                                               2.19e+00
## L_superior_parietal_gyrus_Volume                                                   -1.39e+00
## R_superior_parietal_gyrus_ComputeArea                                              -6.42e-01
## R_superior_parietal_gyrus_Volume                                                    1.10e-01
## Sex                                                                                -2.54e-01
## Weight                                                                             -2.29e-01
## Age                                                                                -2.92e-01
## chr12_rs34637584_GT                                                                 1.67e+01
## chr17_rs11868035_GT                                                                 7.71e-02
## chr17_rs11012_GT                                                                    6.12e-01
## chr17_rs393152_GT                                                                  -1.39e+00
## chr17_rs12185268_GT                                                                 9.54e-01
## chr17_rs199533_GT                                                                  -6.71e-02
## UPDRS_Part_I_Summary_Score_Baseline                                                 6.22e-01
## UPDRS_Part_II_Patient_Questionnaire_Summary_Score_Baseline                          7.59e+00
## UPDRS_Part_III_Summary_Score_Baseline                                               1.74e+00
## X_Assessment_Non.Motor_Epworth_Sleepiness_Scale_Summary_Score_Baseline             -2.54e-01
## X_Assessment_Non.Motor_Geriatric_Depression_Scale_GDS_Short_Summary_Score_Baseline  5.76e-03
## COGSTATEMild Cognitive Impairment (PD-MCI)                                          4.59e+00
## COGSTATENormal Cognition (PD-NC)                                                    1.11e+00
## COGDECLNYes                                                                         3.33e+00
## FNCDTCOGYes                                                                         1.26e+01
## COGDXCL50% - 89%                                                                   -1.32e+01
## COGDXCL90% - 100%                                                                  -1.17e+01
## EDUCYRS                                                                            -1.41e-01
##                                                                                    Std. Error
## (Intercept)                                                                          4.41e+03
## L_caudate_ComputeArea                                                                1.77e+00
## L_caudate_Volume                                                                     1.64e+00
## R_caudate_ComputeArea                                                                1.97e+00
## R_caudate_Volume                                                                     1.81e+00
## L_putamen_ComputeArea                                                                8.98e-01
## L_putamen_Volume                                                                     1.03e+00
## R_putamen_ComputeArea                                                                1.59e+00
## R_putamen_Volume                                                                     1.58e+00
## L_superior_parietal_gyrus_ComputeArea                                                1.53e+00
## L_superior_parietal_gyrus_Volume                                                     1.57e+00
## R_superior_parietal_gyrus_ComputeArea                                                1.30e+00
## R_superior_parietal_gyrus_Volume                                                     1.37e+00
## Sex                                                                                  6.15e-01
## Weight                                                                               2.70e-01
## Age                                                                                  2.56e-01
## chr12_rs34637584_GT                                                                  3.62e+03
## chr17_rs11868035_GT                                                                  3.58e-01
## chr17_rs11012_GT                                                                     8.83e-01
## chr17_rs393152_GT                                                                    7.90e-01
## chr17_rs12185268_GT                                                                  1.24e+00
## chr17_rs199533_GT                                                                    9.76e-01
## UPDRS_Part_I_Summary_Score_Baseline                                                  3.94e-01
## UPDRS_Part_II_Patient_Questionnaire_Summary_Score_Baseline                           1.33e+00
## UPDRS_Part_III_Summary_Score_Baseline                                                4.92e-01
## X_Assessment_Non.Motor_Epworth_Sleepiness_Scale_Summary_Score_Baseline               2.98e-01
## X_Assessment_Non.Motor_Geriatric_Depression_Scale_GDS_Short_Summary_Score_Baseline   2.25e-01
## COGSTATEMild Cognitive Impairment (PD-MCI)                                           2.75e+03
## COGSTATENormal Cognition (PD-NC)                                                     2.75e+03
## COGDECLNYes                                                                          2.19e+00
## FNCDTCOGYes                                                                          1.79e+03
## COGDXCL50% - 89%                                                                     3.45e+03
## COGDXCL90% - 100%                                                                    3.45e+03
## EDUCYRS                                                                              2.40e-01
##                                                                                    z value
## (Intercept)                                                                           0.00
## L_caudate_ComputeArea                                                                 0.72
## L_caudate_Volume                                                                     -0.23
## R_caudate_ComputeArea                                                                -1.01
## R_caudate_Volume                                                                      0.37
## L_putamen_ComputeArea                                                                -0.48
## L_putamen_Volume                                                                      0.90
## R_putamen_ComputeArea                                                                 0.27
## R_putamen_Volume                                                                     -0.35
## L_superior_parietal_gyrus_ComputeArea                                                 1.43
## L_superior_parietal_gyrus_Volume                                                     -0.89
## R_superior_parietal_gyrus_ComputeArea                                                -0.49
## R_superior_parietal_gyrus_Volume                                                      0.08
## Sex                                                                                  -0.41
## Weight                                                                               -0.85
## Age                                                                                  -1.14
## chr12_rs34637584_GT                                                                   0.00
## chr17_rs11868035_GT                                                                   0.22
## chr17_rs11012_GT                                                                      0.69
## chr17_rs393152_GT                                                                    -1.76
## chr17_rs12185268_GT                                                                   0.77
## chr17_rs199533_GT                                                                    -0.07
## UPDRS_Part_I_Summary_Score_Baseline                                                   1.58
## UPDRS_Part_II_Patient_Questionnaire_Summary_Score_Baseline                            5.69
## UPDRS_Part_III_Summary_Score_Baseline                                                 3.54
## X_Assessment_Non.Motor_Epworth_Sleepiness_Scale_Summary_Score_Baseline               -0.85
## X_Assessment_Non.Motor_Geriatric_Depression_Scale_GDS_Short_Summary_Score_Baseline    0.03
## COGSTATEMild Cognitive Impairment (PD-MCI)                                            0.00
## COGSTATENormal Cognition (PD-NC)                                                      0.00
## COGDECLNYes                                                                           1.52
## FNCDTCOGYes                                                                           0.01
## COGDXCL50% - 89%                                                                      0.00
## COGDXCL90% - 100%                                                                     0.00
## EDUCYRS                                                                              -0.59
##                                                                                    Pr(>|z|)
## (Intercept)                                                                         0.99692
## L_caudate_ComputeArea                                                               0.46936
## L_caudate_Volume                                                                    0.82124
## R_caudate_ComputeArea                                                               0.31426
## R_caudate_Volume                                                                    0.70948
## L_putamen_ComputeArea                                                               0.63453
## L_putamen_Volume                                                                    0.36976
## R_putamen_ComputeArea                                                               0.78679
## R_putamen_Volume                                                                    0.72889
## L_superior_parietal_gyrus_ComputeArea                                               0.15192
## L_superior_parietal_gyrus_Volume                                                    0.37614
## R_superior_parietal_gyrus_ComputeArea                                               0.62105
## R_superior_parietal_gyrus_Volume                                                    0.93613
## Sex                                                                                 0.67939
## Weight                                                                              0.39717
## Age                                                                                 0.25433
## chr12_rs34637584_GT                                                                 0.99632
## chr17_rs11868035_GT                                                                 0.82968
## chr17_rs11012_GT                                                                    0.48804
## chr17_rs393152_GT                                                                   0.07762
## chr17_rs12185268_GT                                                                 0.44316
## chr17_rs199533_GT                                                                   0.94519
## UPDRS_Part_I_Summary_Score_Baseline                                                 0.11463
## UPDRS_Part_II_Patient_Questionnaire_Summary_Score_Baseline                          1.2e-08
## UPDRS_Part_III_Summary_Score_Baseline                                               0.00041
## X_Assessment_Non.Motor_Epworth_Sleepiness_Scale_Summary_Score_Baseline              0.39392
## X_Assessment_Non.Motor_Geriatric_Depression_Scale_GDS_Short_Summary_Score_Baseline  0.97955
## COGSTATEMild Cognitive Impairment (PD-MCI)                                          0.99867
## COGSTATENormal Cognition (PD-NC)                                                    0.99968
## COGDECLNYes                                                                         0.12861
## FNCDTCOGYes                                                                         0.99438
## COGDXCL50% - 89%                                                                    0.99694
## COGDXCL90% - 100%                                                                   0.99729
## EDUCYRS                                                                             0.55634
##                                                                                       
## (Intercept)                                                                           
## L_caudate_ComputeArea                                                                 
## L_caudate_Volume                                                                      
## R_caudate_ComputeArea                                                                 
## R_caudate_Volume                                                                      
## L_putamen_ComputeArea                                                                 
## L_putamen_Volume                                                                      
## R_putamen_ComputeArea                                                                 
## R_putamen_Volume                                                                      
## L_superior_parietal_gyrus_ComputeArea                                                 
## L_superior_parietal_gyrus_Volume                                                      
## R_superior_parietal_gyrus_ComputeArea                                                 
## R_superior_parietal_gyrus_Volume                                                      
## Sex                                                                                   
## Weight                                                                                
## Age                                                                                   
## chr12_rs34637584_GT                                                                   
## chr17_rs11868035_GT                                                                   
## chr17_rs11012_GT                                                                      
## chr17_rs393152_GT                                                                  .  
## chr17_rs12185268_GT                                                                   
## chr17_rs199533_GT                                                                     
## UPDRS_Part_I_Summary_Score_Baseline                                                   
## UPDRS_Part_II_Patient_Questionnaire_Summary_Score_Baseline                         ***
## UPDRS_Part_III_Summary_Score_Baseline                                              ***
## X_Assessment_Non.Motor_Epworth_Sleepiness_Scale_Summary_Score_Baseline                
## X_Assessment_Non.Motor_Geriatric_Depression_Scale_GDS_Short_Summary_Score_Baseline    
## COGSTATEMild Cognitive Impairment (PD-MCI)                                            
## COGSTATENormal Cognition (PD-NC)                                                      
## COGDECLNYes                                                                           
## FNCDTCOGYes                                                                           
## COGDXCL50% - 89%                                                                      
## COGDXCL90% - 100%                                                                     
## EDUCYRS                                                                               
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 510.01  on 422  degrees of freedom
## Residual deviance: 137.64  on 389  degrees of freedom
## AIC: 205.6
## 
## Number of Fisher Scoring iterations: 18
summary(stepp)
## 
## Call:
## glm(formula = RECRUITMENT_CAT ~ L_caudate_ComputeArea + R_caudate_ComputeArea + 
##     L_superior_parietal_gyrus_ComputeArea + L_superior_parietal_gyrus_Volume + 
##     Age + chr17_rs393152_GT + chr17_rs12185268_GT + UPDRS_Part_II_Patient_Questionnaire_Summary_Score_Baseline + 
##     UPDRS_Part_III_Summary_Score_Baseline + COGSTATE, family = "binomial", 
##     data = ppmi_1_ROI[, -c(1, 34)], maxit = 50)
## 
## Deviance Residuals: 
##    Min      1Q  Median      3Q     Max  
## -6.647  -0.192   0.003   0.087   1.774  
## 
## Coefficients:
##                                                            Estimate
## (Intercept)                                                  15.844
## L_caudate_ComputeArea                                         1.086
## R_caudate_ComputeArea                                        -1.075
## L_superior_parietal_gyrus_ComputeArea                         1.939
## L_superior_parietal_gyrus_Volume                             -1.669
## Age                                                          -0.439
## chr17_rs393152_GT                                            -1.076
## chr17_rs12185268_GT                                           1.293
## UPDRS_Part_II_Patient_Questionnaire_Summary_Score_Baseline    6.766
## UPDRS_Part_III_Summary_Score_Baseline                         1.919
## COGSTATEMild Cognitive Impairment (PD-MCI)                   -6.212
## COGSTATENormal Cognition (PD-NC)                            -10.460
##                                                            Std. Error
## (Intercept)                                                  1214.607
## L_caudate_ComputeArea                                           0.470
## R_caudate_ComputeArea                                           0.495
## L_superior_parietal_gyrus_ComputeArea                           1.128
## L_superior_parietal_gyrus_Volume                                1.152
## Age                                                             0.222
## chr17_rs393152_GT                                               0.673
## chr17_rs12185268_GT                                             0.783
## UPDRS_Part_II_Patient_Questionnaire_Summary_Score_Baseline      1.145
## UPDRS_Part_III_Summary_Score_Baseline                           0.445
## COGSTATEMild Cognitive Impairment (PD-MCI)                   1214.608
## COGSTATENormal Cognition (PD-NC)                             1214.607
##                                                            z value
## (Intercept)                                                   0.01
## L_caudate_ComputeArea                                         2.31
## R_caudate_ComputeArea                                        -2.17
## L_superior_parietal_gyrus_ComputeArea                         1.72
## L_superior_parietal_gyrus_Volume                             -1.45
## Age                                                          -1.98
## chr17_rs393152_GT                                            -1.60
## chr17_rs12185268_GT                                           1.65
## UPDRS_Part_II_Patient_Questionnaire_Summary_Score_Baseline    5.91
## UPDRS_Part_III_Summary_Score_Baseline                         4.31
## COGSTATEMild Cognitive Impairment (PD-MCI)                   -0.01
## COGSTATENormal Cognition (PD-NC)                             -0.01
##                                                            Pr(>|z|)    
## (Intercept)                                                   0.990    
## L_caudate_ComputeArea                                         0.021 *  
## R_caudate_ComputeArea                                         0.030 *  
## L_superior_parietal_gyrus_ComputeArea                         0.086 .  
## L_superior_parietal_gyrus_Volume                              0.147    
## Age                                                           0.048 *  
## chr17_rs393152_GT                                             0.110    
## chr17_rs12185268_GT                                           0.098 .  
## UPDRS_Part_II_Patient_Questionnaire_Summary_Score_Baseline  3.5e-09 ***
## UPDRS_Part_III_Summary_Score_Baseline                       1.6e-05 ***
## COGSTATEMild Cognitive Impairment (PD-MCI)                    0.996    
## COGSTATENormal Cognition (PD-NC)                              0.993    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 510.01  on 422  degrees of freedom
## Residual deviance: 145.16  on 411  degrees of freedom
## AIC: 169.2
## 
## Number of Fisher Scoring iterations: 16

Create Complete dataset with idea disease categories

ppmi_1_PD1<-ppmi_1[ppmi_1$APPRDX!="psychiatric",]
ppmi_1_PD1<-ppmi_1[ppmi_1$APPRDX!="SWEDD",]
ppmi_1_PD1<-ppmi_1_PD1[,-c(1,302)]
ppmi_1_PD2<-ppmi_1[ppmi_1$APPRDX!="psychiatric",]
ppmi_1_PD2<-ppmi_1[,-c(1,302)]

ppmi_1_PD1$PD1<-ifelse(ppmi_1_PD1$RECRUITMENT_CAT=="HC",1,0)
ppmi_1_PD1<-ppmi_1_PD1[,-300]
ppmi_1_PD1$PD1<-as.factor(ppmi_1_PD1$PD1)

Balance cases

#PD1=PD PD2=PD+SWEDD
set.seed(623)

library(unbalanced)
## Loading required package: mlr
## Loading required package: BBmisc
## Loading required package: ggplot2
## Loading required package: ParamHelpers
## Loading required package: stringi
## Loading required package: foreach
## Loading required package: doParallel
## Loading required package: iterators
## Loading required package: parallel
n<-ncol(ppmi_1_PD1)
output<-ppmi_1_PD1$PD1
input<-ppmi_1_PD1[ ,-n]

#balance the dataset
data<-ubBalance(X= input, Y=output,type="ubSMOTE", verbose=TRUE)
## Proportion of positives after ubSMOTE : 42.9 % of 861 observations
balancedData<-cbind(data$X,data$Y)
ppmi_1_PD1_balanced<-balancedData
colnames(ppmi_1_PD1_balanced)<-colnames(ppmi_1_PD1)


ppmi_1_PD2$PD2<-ifelse(ppmi_1_PD2$RECRUITMENT_CAT=="HC",1,0)
ppmi_1_PD2<-ppmi_1_PD2[,-300]
ppmi_1_PD2$PD2<-as.factor(ppmi_1_PD2$PD2)
#balance cases
set.seed(623)
require(unbalanced)
n<-ncol(ppmi_1_PD2)
output<-ppmi_1_PD2$PD2
input<-ppmi_1_PD2[ ,-n]

#balance the dataset
data<-ubBalance(X= input, Y=output,type="ubSMOTE", verbose=TRUE)
## Proportion of positives after ubSMOTE : 42.9 % of 861 observations
balancedData<-cbind(data$X,data$Y)
ppmi_1_PD2_balanced<-balancedData
colnames(ppmi_1_PD2_balanced)<-colnames(ppmi_1_PD2)

#numerize the 4 datasets
for(i in 295:298){
  ppmi_1_PD1[,i]<-as.numeric(ppmi_1_PD1[,i])
}
for(i in 295:298){
  ppmi_1_PD2[,i]<-as.numeric(ppmi_1_PD2[,i])
}
for(i in 295:298){
  ppmi_1_PD1_balanced[,i]<-as.numeric(ppmi_1_PD1_balanced[,i])
}
for(i in 295:298){
  ppmi_1_PD2_balanced[,i]<-as.numeric(ppmi_1_PD2_balanced[,i])
}

write.csv(ppmi_1_PD1,"PD1_un.csv")
write.csv(ppmi_1_PD2,"PD2_un.csv")
write.csv(ppmi_1_PD1_balanced,"PD1_b.csv")
write.csv(ppmi_1_PD2_balanced,"PD2_b.csv")

Prediction Functions

library(crossval)
## 
## Attaching package: 'crossval'
## The following object is masked from 'package:mlr':
## 
##     crossval
#adaboost
library(ada)
## Loading required package: rpart
library(rpart)
my.ada <- function (train.x, train.y, test.x, test.y, negative, formula){
  ada.fit <- ada(train.x, train.y)
  predict.y <- predict(ada.fit, test.x)
  #count TP,NP..
  out <- confusionMatrix(test.y, predict.y, negative = negative)
  return (out)
}


# svm
library(e1071)
## 
## Attaching package: 'e1071'
## The following object is masked from 'package:mlr':
## 
##     impute
my.svm <- function (train.x, train.y, test.x, test.y, negative, formula){
  svm.fit <- svm(train.x, train.y)
  predict.y <- predict(svm.fit, test.x)
  #count TP,NP..
  out <- confusionMatrix(test.y, predict.y, negative = negative)
  return (out)
}
# naive bayesian
my.nb <- function (train.x, train.y, test.x, test.y, negative, formula){
  nb.fit <- naiveBayes(train.x, train.y)
  predict.y <- predict(nb.fit, test.x)
  #count TP,NP..
  out <- confusionMatrix(test.y, predict.y, negative = negative)
  return (out)
}
# decision trees

library(rpart)
my.detree <- function (train.x, train.y, test.x, test.y, negative, formula){
  train.matrix<-cbind(train.x,train.y)
  detree.fit <- rpart(train.y~.,method="class",data=train.matrix)
  predict.y <- predict(detree.fit, test.x)
  out <- confusionMatrix(test.y, predict.y, negative = negative)
  return (out)
}
#k-Nearest Neighbor Classiï¬cation

library("class")
my.knn <- function (train.x, train.y, test.x, test.y, negative, formula){
  train.matrix<-cbind(train.x,train.y)
  test.matrix<-cbind(test.x,test.y)
  predict.y <- knn(train=train.matrix,test=test.matrix,cl=train.y,k=2)
  out <- confusionMatrix(test.y, predict.y, negative = negative)
  return (out)
}
# Kmeans
my.kmeans <- function (train.x, train.y, test.x, test.y, negative, formula){
  train.matrix<-cbind(train.x,train.y)
  test.matrix<-cbind(test.x,test.y)
  kmeans.fit <- kmeans(train.x,2)
  predict.y <- kmeans.fit$cluster-1
  out <- confusionMatrix(test.y,predict.y, negative = negative)
  return (out)
}

function dealt with complete dataset

classfi<-function(a=my.ada){
  a1<-matrix(NA,4,4)
  a2<-matrix(NA,4,6)
  #PD1 unbalanced
  X <- ppmi_1_PD1[, 1:299]
  Y <- ppmi_1_PD1[,300] 
  form <- "train.y~." 
  neg <- 0
  set.seed(714)
  cv.out <- crossval(a, X, Y, K = 5, B = 1, negative = neg, formula = form)
  out <- diagnosticErrors(cv.out$stat) 
  a1[1,]<-cv.out$stat
  a2[1,]<-out
  
  
  
  #PD2 unbalanced
  X <- ppmi_1_PD2[, 1:299]
  Y <- ppmi_1_PD2[,300] 
  form <- "train.y~." 
  neg <- 0
  set.seed(714)
  cv.out <- crossval(a, X, Y, K = 5, B = 1, negative = neg, formula = form)
  out <- diagnosticErrors(cv.out$stat) 
  
  a1[2,]<-cv.out$stat
  a2[2,]<-out
  
  #PD1 balanced
  X <- ppmi_1_PD1_balanced[, 1:299]
  Y <- ppmi_1_PD1_balanced[,300] 
  form <- "train.y~." 
  neg <- 0
  set.seed(714)
  cv.out <- crossval(a, X, Y, K = 5, B = 1, negative = neg, formula = form)
  out <- diagnosticErrors(cv.out$stat) 
  
  a1[3,]<-cv.out$stat
  a2[3,]<-out
  
  #PD2 balanced
  X <- ppmi_1_PD2_balanced[, 1:299]
  Y <- ppmi_1_PD2_balanced[,300] 
  form <- "train.y~." 
  neg <- 0
  set.seed(714)
  cv.out <- crossval(a, X, Y, K = 5, B = 1, negative = neg, formula = form)
  out <- diagnosticErrors(cv.out$stat) 
  a1[4,]<-cv.out$stat
  a2[4,]<-out
  classification<-as.data.frame(cbind(a1,a2))
  
  classification$Group<-c("PD1_un","PD2_un","PD1_b","PD2_b")
  colnames(classification)[1:10]<-c("FP","TP","TN","FN","acc","sens","spec","ppv","npv","lor")
  write.csv(classification,"classification.csv")
  print(classification)
}

Classification result

adaboost

classfi()
## Number of folds: 5 
## Total number of CV fits: 5 
## 
## Round # 1 of 1 
## CV Fit # 1 of 5 
## CV Fit # 2 of 5 
## CV Fit # 3 of 5 
## CV Fit # 4 of 5 
## CV Fit # 5 of 5 
## Number of folds: 5 
## Total number of CV fits: 5 
## 
## Round # 1 of 1 
## CV Fit # 1 of 5 
## CV Fit # 2 of 5 
## CV Fit # 3 of 5 
## CV Fit # 4 of 5 
## CV Fit # 5 of 5 
## Number of folds: 5 
## Total number of CV fits: 5 
## 
## Round # 1 of 1 
## CV Fit # 1 of 5 
## CV Fit # 2 of 5 
## CV Fit # 3 of 5 
## CV Fit # 4 of 5 
## CV Fit # 5 of 5 
## Number of folds: 5 
## Total number of CV fits: 5 
## 
## Round # 1 of 1 
## CV Fit # 1 of 5 
## CV Fit # 2 of 5 
## CV Fit # 3 of 5 
## CV Fit # 4 of 5 
## CV Fit # 5 of 5 
##    FP   TP   TN  FN   acc  sens  spec   ppv   npv  lor  Group
## 1 0.6 22.6 52.0 2.0 0.966 0.919 0.989 0.974 0.963 6.89 PD1_un
## 2 0.6 22.0 59.4 2.6 0.962 0.894 0.990 0.973 0.958 6.73 PD2_un
## 3 0.8 71.6 97.6 2.2 0.983 0.970 0.992 0.989 0.978 8.29  PD1_b
## 4 2.0 71.8 96.4 2.0 0.977 0.973 0.980 0.973 0.980 7.46  PD2_b

decision tree

classfi(a=my.detree)
## Number of folds: 5 
## Total number of CV fits: 5 
## 
## Round # 1 of 1 
## CV Fit # 1 of 5 
## CV Fit # 2 of 5 
## CV Fit # 3 of 5 
## CV Fit # 4 of 5 
## CV Fit # 5 of 5 
## Number of folds: 5 
## Total number of CV fits: 5 
## 
## Round # 1 of 1 
## CV Fit # 1 of 5 
## CV Fit # 2 of 5 
## CV Fit # 3 of 5 
## CV Fit # 4 of 5 
## CV Fit # 5 of 5 
## Number of folds: 5 
## Total number of CV fits: 5 
## 
## Round # 1 of 1 
## CV Fit # 1 of 5 
## CV Fit # 2 of 5 
## CV Fit # 3 of 5 
## CV Fit # 4 of 5 
## CV Fit # 5 of 5 
## Number of folds: 5 
## Total number of CV fits: 5 
## 
## Round # 1 of 1 
## CV Fit # 1 of 5 
## CV Fit # 2 of 5 
## CV Fit # 3 of 5 
## CV Fit # 4 of 5 
## CV Fit # 5 of 5 
##    FP    TP  TN  FN   acc  sens    spec   ppv   npv    lor  Group
## 1 104  44.4 1.0 4.8 0.294 0.902 0.00951 0.299 0.172 -2.422 PD1_un
## 2 120  49.2 0.0 0.0 0.291 1.000 0.00000 0.291   NaN    NaN PD2_un
## 3 195 146.8 1.4 0.8 0.430 0.995 0.00711 0.429 0.636  0.274  PD1_b
## 4 196 146.4 0.4 1.2 0.426 0.992 0.00203 0.427 0.250 -1.392  PD2_b

naive bayes

classfi(a=my.nb)
## Number of folds: 5 
## Total number of CV fits: 5 
## 
## Round # 1 of 1 
## CV Fit # 1 of 5 
## CV Fit # 2 of 5 
## CV Fit # 3 of 5 
## CV Fit # 4 of 5 
## CV Fit # 5 of 5 
## Number of folds: 5 
## Total number of CV fits: 5 
## 
## Round # 1 of 1 
## CV Fit # 1 of 5 
## CV Fit # 2 of 5 
## CV Fit # 3 of 5 
## CV Fit # 4 of 5 
## CV Fit # 5 of 5 
## Number of folds: 5 
## Total number of CV fits: 5 
## 
## Round # 1 of 1 
## CV Fit # 1 of 5 
## CV Fit # 2 of 5 
## CV Fit # 3 of 5 
## CV Fit # 4 of 5 
## CV Fit # 5 of 5 
## Number of folds: 5 
## Total number of CV fits: 5 
## 
## Round # 1 of 1 
## CV Fit # 1 of 5 
## CV Fit # 2 of 5 
## CV Fit # 3 of 5 
## CV Fit # 4 of 5 
## CV Fit # 5 of 5 
##     FP   TP   TN   FN   acc  sens  spec   ppv   npv   lor  Group
## 1 10.6  9.2 42.0 15.4 0.663 0.374 0.798 0.465 0.732 0.862 PD1_un
## 2 15.4 12.4 44.6 12.2 0.674 0.504 0.743 0.446 0.785 1.080 PD2_un
## 3 45.8 64.4 52.6  9.4 0.679 0.873 0.535 0.584 0.848 2.063  PD1_b
## 4 44.8 65.0 53.6  8.8 0.689 0.881 0.545 0.592 0.859 2.179  PD2_b

svm

classfi(a=my.svm)
## Number of folds: 5 
## Total number of CV fits: 5 
## 
## Round # 1 of 1 
## CV Fit # 1 of 5 
## CV Fit # 2 of 5 
## CV Fit # 3 of 5 
## CV Fit # 4 of 5 
## CV Fit # 5 of 5 
## Number of folds: 5 
## Total number of CV fits: 5 
## 
## Round # 1 of 1 
## CV Fit # 1 of 5 
## CV Fit # 2 of 5 
## CV Fit # 3 of 5 
## CV Fit # 4 of 5 
## CV Fit # 5 of 5 
## Number of folds: 5 
## Total number of CV fits: 5 
## 
## Round # 1 of 1 
## CV Fit # 1 of 5 
## CV Fit # 2 of 5 
## CV Fit # 3 of 5 
## CV Fit # 4 of 5 
## CV Fit # 5 of 5 
## Number of folds: 5 
## Total number of CV fits: 5 
## 
## Round # 1 of 1 
## CV Fit # 1 of 5 
## CV Fit # 2 of 5 
## CV Fit # 3 of 5 
## CV Fit # 4 of 5 
## CV Fit # 5 of 5 
##    FP   TP   TN   FN   acc   sens  spec   ppv   npv  lor  Group
## 1 0.4  4.8 52.2 19.8 0.738 0.1951 0.992 0.923 0.725 3.45 PD1_un
## 2 0.6  1.4 59.4 23.2 0.719 0.0569 0.990 0.700 0.719 1.79 PD2_un
## 3 2.0 69.2 96.4  4.6 0.962 0.9377 0.980 0.972 0.954 6.59  PD1_b
## 4 2.8 67.0 95.6  6.8 0.944 0.9079 0.972 0.960 0.934 5.82  PD2_b

knn

classfi(a=my.knn)
## Number of folds: 5 
## Total number of CV fits: 5 
## 
## Round # 1 of 1 
## CV Fit # 1 of 5 
## CV Fit # 2 of 5 
## CV Fit # 3 of 5 
## CV Fit # 4 of 5 
## CV Fit # 5 of 5 
## Number of folds: 5 
## Total number of CV fits: 5 
## 
## Round # 1 of 1 
## CV Fit # 1 of 5 
## CV Fit # 2 of 5 
## CV Fit # 3 of 5 
## CV Fit # 4 of 5 
## CV Fit # 5 of 5 
## Number of folds: 5 
## Total number of CV fits: 5 
## 
## Round # 1 of 1 
## CV Fit # 1 of 5 
## CV Fit # 2 of 5 
## CV Fit # 3 of 5 
## CV Fit # 4 of 5 
## CV Fit # 5 of 5 
## Number of folds: 5 
## Total number of CV fits: 5 
## 
## Round # 1 of 1 
## CV Fit # 1 of 5 
## CV Fit # 2 of 5 
## CV Fit # 3 of 5 
## CV Fit # 4 of 5 
## CV Fit # 5 of 5 
##     FP   TP   TN   FN   acc  sens  spec   ppv   npv    lor  Group
## 1 15.2  7.6 37.4 17.0 0.583 0.309 0.711 0.333 0.688 0.0953 PD1_un
## 2 17.2  7.2 42.8 17.4 0.591 0.293 0.713 0.295 0.711 0.0292 PD2_un
## 3 17.8 55.2 80.6 18.6 0.789 0.748 0.819 0.756 0.812 2.5981  PD1_b
## 4 20.4 56.2 78.0 17.6 0.779 0.762 0.793 0.734 0.816 2.5022  PD2_b

kmeans

classfi(a=my.kmeans)
## Number of folds: 5 
## Total number of CV fits: 5 
## 
## Round # 1 of 1 
## CV Fit # 1 of 5 
## CV Fit # 2 of 5 
## CV Fit # 3 of 5 
## CV Fit # 4 of 5 
## CV Fit # 5 of 5 
## Number of folds: 5 
## Total number of CV fits: 5 
## 
## Round # 1 of 1 
## CV Fit # 1 of 5 
## CV Fit # 2 of 5 
## CV Fit # 3 of 5 
## CV Fit # 4 of 5 
## CV Fit # 5 of 5 
## Number of folds: 5 
## Total number of CV fits: 5 
## 
## Round # 1 of 1 
## CV Fit # 1 of 5 
## CV Fit # 2 of 5 
## CV Fit # 3 of 5 
## CV Fit # 4 of 5 
## CV Fit # 5 of 5 
## Number of folds: 5 
## Total number of CV fits: 5 
## 
## Round # 1 of 1 
## CV Fit # 1 of 5 
## CV Fit # 2 of 5 
## CV Fit # 3 of 5 
## CV Fit # 4 of 5 
## CV Fit # 5 of 5 
##      FP    TP  TN    FN   acc  sens  spec   ppv   npv     lor  Group
## 1  87.6  38.4 124  58.4 0.527 0.397 0.587 0.305 0.681 -0.0685 PD1_un
## 2 114.2  44.6 127  52.6 0.507 0.459 0.527 0.281 0.707 -0.0587 PD2_un
## 3 163.6 126.0 232 167.6 0.519 0.429 0.586 0.435 0.580  0.0623  PD1_b
## 4 165.4 124.2 230 169.4 0.514 0.423 0.581 0.429 0.576  0.0185  PD2_b

function dealt with dataset without UPDRS (and clinical features)

classfi1<-function(a=my.ada){
  a1<-matrix(NA,4,4)
  a2<-matrix(NA,4,6)
  #PD1 unbalanced
  X <- ppmi_1_PD1[, 1:289]
  Y <- ppmi_1_PD1[,300] 
  form <- "train.y~." 
  neg <- 0
  set.seed(714)
  cv.out <- crossval(a, X, Y, K = 5, B = 1, negative = neg, formula = form)
  out <- diagnosticErrors(cv.out$stat) 
  a1[1,]<-cv.out$stat
  a2[1,]<-out
  
  
  
  #PD2 unbalanced
  X <- ppmi_1_PD2[, 1:289]
  Y <- ppmi_1_PD2[,300] 
  form <- "train.y~." 
  neg <- 0
  set.seed(714)
  cv.out <- crossval(a, X, Y, K = 5, B = 1, negative = neg, formula = form)
  out <- diagnosticErrors(cv.out$stat) 
  
  a1[2,]<-cv.out$stat
  a2[2,]<-out
  
  #PD1 balanced
  X <- ppmi_1_PD1_balanced[, 1:289]
  Y <- ppmi_1_PD1_balanced[,300] 
  form <- "train.y~." 
  neg <- 0
  set.seed(714)
  cv.out <- crossval(a, X, Y, K = 5, B = 1, negative = neg, formula = form)
  out <- diagnosticErrors(cv.out$stat) 
  
  a1[3,]<-cv.out$stat
  a2[3,]<-out
  
  #PD2 balanced
  X <- ppmi_1_PD2_balanced[, 1:289]
  Y <- ppmi_1_PD2_balanced[,300] 
  form <- "train.y~." 
  neg <- 0
  set.seed(714)
  cv.out <- crossval(a, X, Y, K = 5, B = 1, negative = neg, formula = form)
  out <- diagnosticErrors(cv.out$stat) 
  a1[4,]<-cv.out$stat
  a2[4,]<-out
  classification<-cbind(a1,a2)
  colnames(classification)[1:10]<-c("FP","TP","TN","FN","acc","sens","spec","ppv","npv","lor")
  write.csv(classification,"classification2.csv")
  print(classification)
}

adaboost

classfi1()
## Number of folds: 5 
## Total number of CV fits: 5 
## 
## Round # 1 of 1 
## CV Fit # 1 of 5 
## CV Fit # 2 of 5 
## CV Fit # 3 of 5 
## CV Fit # 4 of 5 
## CV Fit # 5 of 5 
## Number of folds: 5 
## Total number of CV fits: 5 
## 
## Round # 1 of 1 
## CV Fit # 1 of 5 
## CV Fit # 2 of 5 
## CV Fit # 3 of 5 
## CV Fit # 4 of 5 
## CV Fit # 5 of 5 
## Number of folds: 5 
## Total number of CV fits: 5 
## 
## Round # 1 of 1 
## CV Fit # 1 of 5 
## CV Fit # 2 of 5 
## CV Fit # 3 of 5 
## CV Fit # 4 of 5 
## CV Fit # 5 of 5 
## Number of folds: 5 
## Total number of CV fits: 5 
## 
## Round # 1 of 1 
## CV Fit # 1 of 5 
## CV Fit # 2 of 5 
## CV Fit # 3 of 5 
## CV Fit # 4 of 5 
## CV Fit # 5 of 5 
##       FP   TP   TN   FN   acc   sens  spec   ppv   npv   lor
## [1,] 4.0  4.0 48.6 20.6 0.681 0.1626 0.924 0.500 0.702 0.858
## [2,] 4.2  2.0 55.8 22.6 0.683 0.0813 0.930 0.323 0.712 0.162
## [3,] 2.8 58.0 95.6 15.8 0.892 0.7859 0.972 0.954 0.858 4.831
## [4,] 4.6 55.8 93.8 18.0 0.869 0.7561 0.953 0.924 0.839 4.147

decision tree

classfi1(a=my.detree)
## Number of folds: 5 
## Total number of CV fits: 5 
## 
## Round # 1 of 1 
## CV Fit # 1 of 5 
## CV Fit # 2 of 5 
## CV Fit # 3 of 5 
## CV Fit # 4 of 5 
## CV Fit # 5 of 5 
## Number of folds: 5 
## Total number of CV fits: 5 
## 
## Round # 1 of 1 
## CV Fit # 1 of 5 
## CV Fit # 2 of 5 
## CV Fit # 3 of 5 
## CV Fit # 4 of 5 
## CV Fit # 5 of 5 
## Number of folds: 5 
## Total number of CV fits: 5 
## 
## Round # 1 of 1 
## CV Fit # 1 of 5 
## CV Fit # 2 of 5 
## CV Fit # 3 of 5 
## CV Fit # 4 of 5 
## CV Fit # 5 of 5 
## Number of folds: 5 
## Total number of CV fits: 5 
## 
## Round # 1 of 1 
## CV Fit # 1 of 5 
## CV Fit # 2 of 5 
## CV Fit # 3 of 5 
## CV Fit # 4 of 5 
## CV Fit # 5 of 5 
##       FP    TP  TN  FN   acc  sens   spec   ppv   npv     lor
## [1,] 100  47.4 5.0 1.8 0.339 0.963 0.0475 0.321 0.735  0.2731
## [2,] 115  48.2 5.0 1.0 0.314 0.980 0.0417 0.295 0.833  0.7399
## [3,] 187 139.8 9.8 7.8 0.434 0.947 0.0498 0.428 0.557 -0.0626
## [4,] 189 139.6 7.4 8.0 0.427 0.946 0.0376 0.424 0.481 -0.3830

naive bayes

classfi1(a=my.nb)
## Number of folds: 5 
## Total number of CV fits: 5 
## 
## Round # 1 of 1 
## CV Fit # 1 of 5 
## CV Fit # 2 of 5 
## CV Fit # 3 of 5 
## CV Fit # 4 of 5 
## CV Fit # 5 of 5 
## Number of folds: 5 
## Total number of CV fits: 5 
## 
## Round # 1 of 1 
## CV Fit # 1 of 5 
## CV Fit # 2 of 5 
## CV Fit # 3 of 5 
## CV Fit # 4 of 5 
## CV Fit # 5 of 5 
## Number of folds: 5 
## Total number of CV fits: 5 
## 
## Round # 1 of 1 
## CV Fit # 1 of 5 
## CV Fit # 2 of 5 
## CV Fit # 3 of 5 
## CV Fit # 4 of 5 
## CV Fit # 5 of 5 
## Number of folds: 5 
## Total number of CV fits: 5 
## 
## Round # 1 of 1 
## CV Fit # 1 of 5 
## CV Fit # 2 of 5 
## CV Fit # 3 of 5 
## CV Fit # 4 of 5 
## CV Fit # 5 of 5 
##        FP   TP   TN   FN   acc  sens  spec   ppv   npv   lor
## [1,] 13.0  7.8 39.6 16.8 0.614 0.317 0.753 0.375 0.702 0.347
## [2,] 16.6  8.8 43.4 15.8 0.617 0.358 0.723 0.346 0.733 0.376
## [3,] 72.4 64.2 26.0  9.6 0.524 0.870 0.264 0.470 0.730 0.876
## [4,] 70.6 64.6 27.8  9.2 0.537 0.875 0.283 0.478 0.751 1.017

svm

classfi1(a=my.svm)
## Number of folds: 5 
## Total number of CV fits: 5 
## 
## Round # 1 of 1 
## CV Fit # 1 of 5 
## CV Fit # 2 of 5 
## CV Fit # 3 of 5 
## CV Fit # 4 of 5 
## CV Fit # 5 of 5 
## Number of folds: 5 
## Total number of CV fits: 5 
## 
## Round # 1 of 1 
## CV Fit # 1 of 5 
## CV Fit # 2 of 5 
## CV Fit # 3 of 5 
## CV Fit # 4 of 5 
## CV Fit # 5 of 5 
## Number of folds: 5 
## Total number of CV fits: 5 
## 
## Round # 1 of 1 
## CV Fit # 1 of 5 
## CV Fit # 2 of 5 
## CV Fit # 3 of 5 
## CV Fit # 4 of 5 
## CV Fit # 5 of 5 
## Number of folds: 5 
## Total number of CV fits: 5 
## 
## Round # 1 of 1 
## CV Fit # 1 of 5 
## CV Fit # 2 of 5 
## CV Fit # 3 of 5 
## CV Fit # 4 of 5 
## CV Fit # 5 of 5 
##       FP   TP   TN   FN   acc    sens  spec   ppv   npv  lor
## [1,] 0.2  0.4 52.4 24.2 0.684 0.01626 0.996 0.667 0.684 1.47
## [2,] 0.4  0.2 59.6 24.4 0.707 0.00813 0.993 0.333 0.710 0.20
## [3,] 4.8 53.0 93.6 20.8 0.851 0.71816 0.951 0.917 0.818 3.91
## [4,] 4.2 50.4 94.2 23.4 0.840 0.68293 0.957 0.923 0.801 3.88

knn

classfi1(a=my.knn)
## Number of folds: 5 
## Total number of CV fits: 5 
## 
## Round # 1 of 1 
## CV Fit # 1 of 5 
## CV Fit # 2 of 5 
## CV Fit # 3 of 5 
## CV Fit # 4 of 5 
## CV Fit # 5 of 5 
## Number of folds: 5 
## Total number of CV fits: 5 
## 
## Round # 1 of 1 
## CV Fit # 1 of 5 
## CV Fit # 2 of 5 
## CV Fit # 3 of 5 
## CV Fit # 4 of 5 
## CV Fit # 5 of 5 
## Number of folds: 5 
## Total number of CV fits: 5 
## 
## Round # 1 of 1 
## CV Fit # 1 of 5 
## CV Fit # 2 of 5 
## CV Fit # 3 of 5 
## CV Fit # 4 of 5 
## CV Fit # 5 of 5 
## Number of folds: 5 
## Total number of CV fits: 5 
## 
## Round # 1 of 1 
## CV Fit # 1 of 5 
## CV Fit # 2 of 5 
## CV Fit # 3 of 5 
## CV Fit # 4 of 5 
## CV Fit # 5 of 5 
##        FP   TP   TN   FN   acc  sens  spec   ppv   npv    lor
## [1,] 15.2  7.6 37.4 17.0 0.583 0.309 0.711 0.333 0.688 0.0953
## [2,] 17.2  7.2 42.8 17.4 0.591 0.293 0.713 0.295 0.711 0.0292
## [3,] 17.8 55.2 80.6 18.6 0.789 0.748 0.819 0.756 0.812 2.5981
## [4,] 20.4 56.2 78.0 17.6 0.779 0.762 0.793 0.734 0.816 2.5022

kmeans

classfi1(a=my.kmeans)
## Number of folds: 5 
## Total number of CV fits: 5 
## 
## Round # 1 of 1 
## CV Fit # 1 of 5 
## CV Fit # 2 of 5 
## CV Fit # 3 of 5 
## CV Fit # 4 of 5 
## CV Fit # 5 of 5 
## Number of folds: 5 
## Total number of CV fits: 5 
## 
## Round # 1 of 1 
## CV Fit # 1 of 5 
## CV Fit # 2 of 5 
## CV Fit # 3 of 5 
## CV Fit # 4 of 5 
## CV Fit # 5 of 5 
## Number of folds: 5 
## Total number of CV fits: 5 
## 
## Round # 1 of 1 
## CV Fit # 1 of 5 
## CV Fit # 2 of 5 
## CV Fit # 3 of 5 
## CV Fit # 4 of 5 
## CV Fit # 5 of 5 
## Number of folds: 5 
## Total number of CV fits: 5 
## 
## Round # 1 of 1 
## CV Fit # 1 of 5 
## CV Fit # 2 of 5 
## CV Fit # 3 of 5 
## CV Fit # 4 of 5 
## CV Fit # 5 of 5 
##         FP    TP  TN    FN   acc  sens  spec   ppv   npv     lor
## [1,]  87.6  38.4 124  58.4 0.527 0.397 0.587 0.305 0.681 -0.0685
## [2,] 114.2  44.6 127  52.6 0.507 0.459 0.527 0.281 0.707 -0.0587
## [3,] 163.6 126.0 232 167.6 0.519 0.429 0.586 0.435 0.580  0.0623
## [4,] 165.4 124.2 230 169.4 0.514 0.423 0.581 0.429 0.576  0.0185

adaboost scores and results

# Varplot, table S2 and table S3
require(ada)
#unbalanced PD1
set.seed(729) 
c1<-ada(PD1~.,data=ppmi_1_PD1)

n_c<-ncol(ppmi_1_PD1)-1
PD1_score<-varplot(c1,type="scores",max.var.show=n_c)

top30<-varplot(c1,type="scores")

data_top30<-cbind(names(top30),"PD1","unBalanced")
write.csv(data_top30,"top30.csv")

#unbalanced PD2
set.seed(729) 
c1<-ada(PD2~.,data=ppmi_1_PD2)

n_c<-ncol(ppmi_1_PD2)-1
PD2_score<-varplot(c1,type="scores",max.var.show=n_c)

top30<-varplot(c1,type="scores")

data_top30<-cbind(names(top30),"PD2","unBalanced")
write.csv(data_top30,"top30.csv")

#balanced PD1
set.seed(729) 
c1<-ada(PD1~.,data=ppmi_1_PD1_balanced)

n_c<-ncol(ppmi_1_PD1_balanced)-1
PD1_score_b<-varplot(c1,type="scores",max.var.show=n_c)

top30<-varplot(c1,type="scores")

data_top30<-cbind(names(top30),"PD1","Balanced")
write.csv(data_top30,"top30.csv")


#balanced PD2
set.seed(729) 
c1<-ada(PD2~.,data=ppmi_1_PD2_balanced)

n_c<-ncol(ppmi_1_PD2_balanced)-1
PD2_score_b<-varplot(c1,type="scores",max.var.show=n_c)

top30<-varplot(c1,type="scores")

data_top30<-cbind(names(top30),"PD2","Balanced")
write.csv(data_top30,"top30.csv")


write.csv(PD1_score,"trial1.csv")
write.csv(PD2_score,"trial2.csv")
write.csv(PD1_score_b,"trial3.csv")
write.csv(PD2_score_b,"trial4.csv")
########

without UPDRS

#unbalanced PD1
set.seed(729) 
c1<-ada(PD1~.,data=ppmi_1_PD1[,-c(290:294)])

n_c<-ncol(ppmi_1_PD1[,-c(290:294)])-1
PD1_score_noUPDRS<-varplot(c1,type="scores",max.var.show=n_c)

top30<-varplot(c1,type="scores")

data_top30<-cbind(names(top30),"PD1","unBalanced")
write.csv(data_top30,"top30.csv")

#unbalanced PD2
set.seed(729) 
c1<-ada(PD2~.,data=ppmi_1_PD2[,-c(290:294)])

n_c<-ncol(ppmi_1_PD2[,-c(290:294)])-1
PD2_score_noUPDRS<-varplot(c1,type="scores",max.var.show=n_c)

top30<-varplot(c1,type="scores")

data_top30<-cbind(names(top30),"PD2","unBalanced")
write.csv(data_top30,"top30.csv")


#balanced PD1
set.seed(729) 
c1<-ada(PD1~.,data=ppmi_1_PD1_balanced[,-c(290:294)])

n_c<-ncol(ppmi_1_PD1_balanced[,-c(290:294)])-1
PD1_score_b_noUPDRS<-varplot(c1,type="scores",max.var.show=n_c)

top30<-varplot(c1,type="scores")

data_top30<-cbind(names(top30),"PD1","Balanced")
write.csv(data_top30,"top30.csv")

#balanced PD2
set.seed(729) 
c1<-ada(PD2~.,data=ppmi_1_PD2_balanced[,-c(290:294)])

n_c<-ncol(ppmi_1_PD2_balanced[,-c(290:294)])-1
PD2_score_b_noUPDRS<-varplot(c1,type="scores",max.var.show=n_c)

top30<-varplot(c1,type="scores")

data_top30<-cbind(names(top30),"PD2","Balanced")
write.csv(data_top30,"top30.csv")

write.csv(PD1_score_noUPDRS,"trial5.csv")
write.csv(PD2_score_noUPDRS,"trial6.csv")
write.csv(PD1_score_b_noUPDRS,"trial7.csv")
write.csv(PD2_score_b_noUPDRS,"trial8.csv")